This is the results section for the Study 2 NSE & SE CHILDREN watching ASL Stories. We have two main factors:
We are taking out one older KODA (Ethan, 10.5 yrs) to balance the groups better.
library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)
kids <- read_feather("cleanedchildeyedata.feather") %>%
# mutate(age = age*12) %>%
select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
rename(name = participant) %>%
filter(age < 9) %>% # Take out Ethan
# mutate(agegroup = case_when(
# age <= 8.99 ~ "younger",
# age >= 9.0 & age < 15 ~ "older"
# )) %>%
# filter(!is.na(agegroup)) %>%
mutate(language = case_when(
language == "english" ~ "NSE",
language =="sign" ~ "SE"
)) %>%
rename(lang = language)
kidsinfo <- kids %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang) %>%
summarise(N = n(),
age_mean = mean(age),
sd = sd(age),
min = min(age),
max = max(age))
genders <- kids %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang, gender) %>%
summarise(N = n()) %>%
spread(gender, N)
kidsinfo <- left_join(kidsinfo, genders) %>%
select(lang, N, Female, Male, age_mean, sd, min, max) %>%
print()# babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))
# IF we do age groups, use this code
#
# babiesinfo <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup) %>%
# summarise(N = n(),
# age_mean = mean(age),
# sd = sd(age),
# min = min(age),
# max = max(age))
#
# genders <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup, gender) %>%
# summarise(N = n()) %>%
# spread(gender, N)
#
# babiesinfo <- left_join(babiesinfo, genders) %>%
# select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
# print()Let’s plot the ages, and check if there is significant difference in ages between the two groups?
# Boxplot
kids %>%
select(name, age, lang) %>%
distinct() %>%
ggplot(aes(x = lang, y = age, fill = lang)) + geom_boxplot(width = 0.5) + guides(fill = FALSE)kids %>%
select(name, age, lang) %>%
distinct() %>%
ggplot(aes(x = age, fill = lang)) + geom_histogram() + facet_grid(lang ~ .)# T-test
nse_age <- kids %>% filter(lang == "NSE") %>% select(name, age) %>% distinct()
se_age <- kids %>% filter(lang == "SE") %>% select(name, age) %>% distinct()
t.test(nse_age$age, se_age$age)
Welch Two Sample t-test
data: nse_age$age and se_age$age
t = 0.14316, df = 32.568, p-value = 0.887
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.020020 1.174354
sample estimates:
mean of x mean of y
4.986667 4.909500
For children, we calculated percentages based on overall clip length as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length.
The chart below shows there seems to be an effect of age; older kids look longer at it than younger kids. Maybe not too surprising. It means we need to keep age in any models we run. Let’s analyze a bit more below.
kids$lang <- as.factor(kids$lang)
kids_overall_looking <- kids %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(percent = sum(percent)) # gets total looking percent for each trial for each kid
# Table of means
kids_overall_looking %>%
group_by(name, lang, direction) %>%
summarise(percent = mean(percent)) %>% # get average looking percent for each kid
group_by(lang, direction) %>%
summarise(mean_percent = mean(percent),
count = n(),
sd = sd(percent),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()ggplot(kids_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = TRUE) +
ggtitle("Video Attention") +
xlab("age (months)") +
ylab("percent looking") +
theme_bw() +
scale_y_continuous(limits = c(0,1), labels = percent) # Plot
# babies_overall_looking %>%
# group_by(lang, direction, name) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(lang, direction) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# # facet_wrap("lang") +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")
# babies_overall_looking %>%
# ggplot(aes(x = lang, y = percent, fill = direction)) +
# facet_wrap("agegroup") +
# geom_violin()A linear model shows a significant effect of age. Overall, Age seems to increase overall looking by about 3% every year. However, there are no differences between NSE v. SE, or reversal, on how long they looked, so that’s good.
global_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = kids_overall_looking)
summary(global_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: kids_overall_looking
REML criterion at convergence: -112.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.4984 -0.6942 0.2184 0.7802 2.5040
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.0071099 0.08432
story (Intercept) 0.0003724 0.01930
Residual 0.0397616 0.19940
Number of obs: 471, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.582022 0.062000 34.960192 9.387 4.37e-11 ***
age 0.032069 0.010836 30.900877 2.959 0.00587 **
langSE 0.032270 0.039171 51.078694 0.824 0.41386
directionreversed -0.031150 0.028318 375.529039 -1.100 0.27203
langSE:directionreversed -0.003912 0.037809 384.535230 -0.103 0.91765
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.872
langSE -0.376 0.019
dirctnrvrsd -0.234 0.004 0.368
lngSE:drctn 0.179 -0.006 -0.482 -0.756
#ggcoef(global_lm)Now we’ll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let’s make a boxplot of all AOIs. Interesting, definitely more MidFaceBottom focus here than we had with babies, but also more distribution too.
# Recalculate percent
kids <- kids %>%
select(-percent) %>%
group_by(name, lang, age, direction, story, mark, trial, repetition) %>%
mutate(totalsec = sum(secs)) %>%
group_by(name, lang, age, direction, story, mark, trial, repetition, aoi) %>%
summarise(percent = secs/totalsec)
# Boxplot
kids %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))It appears two important AOIs are MidChestTop and MidFaceBottom. Let’s look again only at midline AOIs:
midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")
kids %>%
filter(aoi %in% midline) %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("Midline AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))I’m going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.
MidChestTop:
MidFaceBottom:
kids %>%
filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
ggplot(aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(0,1), labels = percent) +
theme_bw() +
# theme(panel.grid.major.x = element_blank()) +
facet_grid(aoi ~ lang) +
ggtitle("AOI Attention") +
xlab("") +
ylab("percent looking")midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(kids, aoi == "MidChestTop"))
summary(midchesttop_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(kids, aoi == "MidChestTop")
REML criterion at convergence: -108
Scaled residuals:
Min 1Q Median 3Q Max
-2.0759 -0.6139 -0.1635 0.5257 3.4431
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.0236538 0.15380
story (Intercept) 0.0001076 0.01037
Residual 0.0375729 0.19384
Number of obs: 471, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.332735 0.097931 33.690951 3.398 0.00176 **
age -0.004869 0.017507 32.325737 -0.278 0.78269
langSE -0.067047 0.058495 39.321521 -1.146 0.25863
directionreversed -0.047327 0.027264 337.964771 -1.736 0.08350 .
langSE:directionreversed 0.049457 0.036436 348.960799 1.357 0.17554
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.892
langSE -0.360 0.022
dirctnrvrsd -0.142 0.003 0.235
lngSE:drctn 0.109 -0.004 -0.310 -0.751
#ggcoef(midchesttop_lm)
midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(kids, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(kids, aoi == "MidFaceBottom")
REML criterion at convergence: 27
Scaled residuals:
Min 1Q Median 3Q Max
-2.6917 -0.6530 -0.0125 0.7073 2.5459
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.025608 0.16003
story (Intercept) 0.001334 0.03652
Residual 0.050261 0.22419
Number of obs: 471, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.359107 0.104202 34.707523 3.446 0.00151 **
age -0.005518 0.018451 32.184210 -0.299 0.76684
langSE 0.188369 0.062415 41.101136 3.018 0.00436 **
directionreversed -0.005451 0.032290 418.172403 -0.169 0.86602
langSE:directionreversed -0.049417 0.043090 421.681027 -1.147 0.25210
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.883
langSE -0.361 0.021
dirctnrvrsd -0.158 0.002 0.266
lngSE:drctn 0.122 -0.003 -0.345 -0.762
#ggcoef(midfacebottom_lm)
# Bar chart
# babies %>%
# filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
# group_by(agegroup, lang, direction, name, aoi) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction, aoi) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_grid(aoi ~ agegroup) +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")Next, we’ll define a Face-Chest Ratio (FCR) such that:
We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.
kids_fcr <- kids %>%
spread(aoi,percent) %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
fcr = (face - chest) / (face + chest))
# Table of means
kids_fcr %>%
group_by(lang, direction, name) %>%
summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_fcr = mean(fcr), # gets group averages
count = n(),
sd = sd(fcr),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()# Plot
ggplot(kids_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Face-Chest Ratios") +
xlab("") +
ylab("FCR")# Bar chart
# babies_fcr %>%
# group_by(agegroup, lang, direction, name) %>%
# summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction) %>%
# summarise(mean_fcr = mean(fcr), # gets group averages
# count = n(),
# sd = sd(fcr),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_fcr, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(-1,1)) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_wrap("agegroup") +
# ggtitle("Face-Chest Ratios") +
# xlab("") +
# ylab("FCR")What will a linear mixed model tell us? (with no age interactions)
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = kids_fcr)
summary(fcr_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: fcr ~ age + lang * direction + (1 | name) + (1 | story)
Data: kids_fcr
REML criterion at convergence: 747.9
Scaled residuals:
Min 1Q Median 3Q Max
-3.04087 -0.55798 0.04156 0.68150 2.93235
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.17809 0.4220
story (Intercept) 0.01392 0.1180
Residual 0.22797 0.4775
Number of obs: 471, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.051211 0.268645 34.803179 -0.191 0.8499
age -0.001861 0.047533 32.187271 -0.039 0.9690
langSE 0.357443 0.157942 38.316970 2.263 0.0294 *
directionreversed -0.047553 0.069424 433.299141 -0.685 0.4937
langSE:directionreversed -0.068533 0.092601 434.179175 -0.740 0.4596
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.882
langSE -0.355 0.022
dirctnrvrsd -0.132 0.001 0.227
lngSE:drctn 0.102 -0.002 -0.293 -0.766
#ggcoef(fcr_lm)I want to try to visualize reversal effects a different way. Maybe this.
# Get participant-level data
kids_fcr2 <- kids_fcr %>%
group_by(name, age, lang, direction) %>%
summarise(fcr = mean(fcr))
# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = kids_fcr2)
# summary(reversal_effect_lm)
ggplot(kids_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
geom_point() +
geom_line(aes(group = name)) +
facet_grid(. ~ lang) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw()Or a reversal effect chart? Okay, so this chart tells us overall there really wasn’t much of a reversal effect for SE babies, they’re all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories!
# Get participant-level data
kids_fcr3 <- kids_fcr2 %>%
spread(direction, fcr) %>%
group_by(name, age, lang) %>%
summarise(diff = forward - reversed)
ggplot(kids_fcr3, aes(x = age, y = diff, color = lang)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
ggtitle("Reversal Effect") +
ylab("Forward FCR - Reversed FCR")And within-subjects variation here:
# First get the mean of each trial, THEN the participant-level means
within_subjects <- kids_fcr %>%
group_by(name, lang, direction, story, repetition) %>%
summarise(fcr = mean(fcr, na.rm = TRUE),
count = n()) %>%
group_by(name, lang, direction) %>%
summarise(mean = mean(fcr, na.rm = TRUE),
se = sd(fcr, na.rm = TRUE)/sqrt(n()),
count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
select(-se, -count) %>%
spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
select(-mean, -count) %>%
spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))
# Now let's plot
lims <- c(-1,1)
within_subjects %>%
ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
geom_abline() +
geom_point(size = 2) +
geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
theme_bw() +
theme(aspect.ratio = 1) +
scale_x_continuous("forward", limits = c(-1,1)) +
scale_y_continuous("reversed", limits = c(-1,1)) +
ggtitle("FCR Means") +
facet_wrap("lang")And a classic box/error plot with age collapsed.
kids_fcr2 %>%
group_by(lang, direction) %>%
summarise(fcr_mean = mean(fcr),
sd = sd(fcr),
n = n(),
se = sd/sqrt(n)) %>%
ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
scale_y_continuous(limits = c(-0.5, 0.5)) +
theme_linedraw()And now heat maps!
heatmap_kids <- kids %>%
filter(aoi %in% midline) %>%
ungroup() %>%
group_by(lang, name, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")))
ggplot(heatmap_kids, aes(x = lang, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.52), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ direction) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))ggplot(heatmap_kids, aes(x = direction, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.52), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ lang) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))# All Data
#Here's all AOI data.
kids %>%
ungroup() %>%
group_by(lang, name, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
openxlsx::write.xlsx("~/Desktop/kids_by_direction.xlsx")
kids %>%
ungroup() %>%
group_by(lang, name, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
openxlsx::write.xlsx("~/Desktop/kids_collapsed.xlsx")No big changes from the ICSLA abstract. Good!
The interpretation here is that:
That doesn’t mean both groups of children don’t care about reversal. On the contrary. We can hypothesize that SE kids have efficient gaze behavior and are resilient to reversal; while NSE kids already are “inefficient” and changing the video stimulus isn’t going to help. But how do we test that? Maybe let’s look at within-subject variation.
Let’s try correlations.
# Let's try correlations
kids_nse <- kids %>%
filter(aoi %in% midline) %>%
filter(lang == "NSE") %>%
group_by(name, direction, aoi) %>%
summarise(percent = mean(percent)) %>%
ungroup() %>%
mutate(direction = case_when(
direction == "forward" ~ "fw",
direction == "reversed" ~ "rv"
)) %>%
unite(aoi2, direction, aoi, sep = "_") %>%
spread(aoi2, percent) %>%
select(-name)
kids_se <- kids %>%
filter(aoi %in% midline) %>%
filter(lang == "SE") %>%
group_by(name, direction, aoi) %>%
summarise(percent = mean(percent)) %>%
ungroup() %>%
mutate(direction = case_when(
direction == "forward" ~ "fw",
direction == "reversed" ~ "rv"
)) %>%
unite(aoi2, direction, aoi, sep = "_") %>%
spread(aoi2, percent) %>%
select(-name)
ggcorr(kids_nse, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("NSE")ggcorr(kids_se, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("SE")library(corrr)
kids_nse %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("NSE Children")kids_se %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("SE Children")We’ll load the data from the childxydata.feather file made in 06rawxydata.Rmd. So any new kids, please run the first code block in 06 to include it. Then we’ll keep all the kids we also have in the AOI data group.
included <- kids %>%
ungroup() %>%
select(name) %>%
distinct() %>%
unlist()
xydata <- read_feather("../Child Data/childxydata.feather") %>%
rename(name = participant) %>%
filter(name %in% included)
# Get ages
ages <- read_csv("childrenages.csv") %>%
rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
mutate(age = age*12) %>%
mutate(agegroup = case_when(
age <= 8.99 ~ "younger",
age >= 9.0 & age < 15 ~ "older"
)) %>%
mutate(language = case_when(
language == "EnglishExposed" ~ "NSE",
language == "SignLanguageExposed" ~ "SE"
)) %>%
rename(lang = language) %>%
select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
separate(condition, into = c("story", "clip", "direction")) %>%
unite("story", c("story", "clip")) %>%
mutate(direction = case_when(
direction == "ER" ~ "reversed",
direction == "FW" ~ "forward"
)) %>%
mutate(name = factor(name),
group = factor(group),
gender = factor(gender),
lang = factor(lang),
story = factor(story),
direction = factor(direction),
mark = factor(mark),
trial = factor(trial),
repetition = factor(repetition),
agegroup = factor(agegroup))Let’s check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as “Global Looking” we have above, really, but w raw xy data.
xy_overall <- xydata %>%
filter(!is.na(x)) %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(data_points = n()) # gets total looking percent for each trial for each baby
# Table of means
xy_overall %>%
group_by(name, lang, direction) %>%
summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_data_points = mean(data_points),
count = n(),
sd = sd(data_points),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Data Points") +
xlab("age (months)") +
ylab("data points recorded") +
theme_bw() Description.
overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm) Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: data_points ~ age + lang * direction + (direction | name) + (direction | story)
Data: xy_overall
REML criterion at convergence: 7516.2
Scaled residuals:
Min 1Q Median 3Q Max
-3.0141 -0.5798 0.1909 0.7325 2.2753
Random effects:
Groups Name Variance Std.Dev. Corr
name (Intercept) 1.896e+04 137.6804
directionreversed 5.468e-02 0.2338 1.00
story (Intercept) 4.170e+04 204.1974
directionreversed 9.717e+03 98.5770 -0.44
Residual 6.768e+04 260.1533
Number of obs: 535, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 525.457 117.894 29.272 4.457 0.000112 ***
age 2.465 1.397 32.977 1.765 0.086892 .
langSE 5.896 57.877 32.089 0.102 0.919493
directionreversed -17.944 49.949 12.211 -0.359 0.725547
langSE:directionreversed 10.156 47.247 487.661 0.215 0.829889
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.697
langSE -0.278 -0.009
dirctnrvrsd -0.297 -0.002 0.227
lngSE:drctn 0.114 0.005 -0.406 -0.552
#ggcoef(overall_xy_lm)Now we’re going to run LMMs on babies’ raw:
But to do this we first trim each kid’s data, getting rid of the first 60 samples (0.50 secs) of each trial.
xydata <- xydata %>%
group_by(name,trial) %>%
slice(30:n())
iqr <- xydata %>%
group_by(name, age, lang, story, direction, trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE),
area = xIQR*yIQR)
head(iqr,20)Description.
xiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_xIQR = mean(xIQR), # gets group averages
count = n(),
sd = sd(xIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Horizontal Spread") +
xlab("") +
ylab("xIQR")ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()xiqr_lm <- lmer(xIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(xiqr_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: xIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 5145.4
Scaled residuals:
Min 1Q Median 3Q Max
-1.3232 -0.4314 -0.1666 0.1539 12.5401
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 62.388 7.899
story (Intercept) 8.698 2.949
Residual 878.268 29.636
Number of obs: 534, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 37.50826 6.90369 39.33107 5.433 3.1e-06 ***
age -0.05562 0.09857 32.45361 -0.564 0.576
langSE -0.93298 4.55974 69.15306 -0.205 0.838
directionreversed 3.17802 3.95955 434.71975 0.803 0.423
langSE:directionreversed -2.66564 5.25874 457.71103 -0.507 0.612
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.853
langSE -0.389 0.012
dirctnrvrsd -0.287 -0.002 0.439
lngSE:drctn 0.212 0.007 -0.572 -0.759
#ggcoef(xiqr_lm)Description.
yiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_yIQR = mean(yIQR), # gets group averages
count = n(),
sd = sd(yIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Vertical Spread") +
xlab("") +
ylab("yIQR")ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()yiqr_lm <- lmer(yIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(yiqr_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: yIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 5653.8
Scaled residuals:
Min 1Q Median 3Q Max
-1.5596 -0.5064 -0.2414 0.1097 5.9864
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 300.671 17.340
story (Intercept) 9.835 3.136
Residual 2251.601 47.451
Number of obs: 534, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 59.0444 13.0515 37.0017 4.524 6.07e-05 ***
age -0.1151 0.1903 32.8125 -0.605 0.5493
langSE -10.5298 8.3370 56.5536 -1.263 0.2118
directionreversed 10.9449 6.2896 400.3400 1.740 0.0826 .
langSE:directionreversed -10.0338 8.3652 432.0794 -1.199 0.2310
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.872
langSE -0.378 0.015
dirctnrvrsd -0.241 -0.001 0.381
lngSE:drctn 0.178 0.006 -0.497 -0.755
#ggcoef(yiqr_lm)Description.
area_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(area_mean = mean(area), # gets group averages
count = n(),
sd = sd(area),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Viewing Area") +
xlab("") +
ylab("Area (px^2)")ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()area_lm <- lmer(area ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(area_lm)Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: area ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 10618.2
Scaled residuals:
Min 1Q Median 3Q Max
-1.0106 -0.2805 -0.1782 -0.0233 15.8517
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 1710669 1307.9
story (Intercept) 31917 178.7
Residual 27599545 5253.5
Number of obs: 534, groups: name, 35; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 2544.897 1172.877 39.578 2.170 0.0361 *
age -8.878 16.888 33.501 -0.526 0.6026
langSE 175.276 786.530 72.890 0.223 0.8243
directionreversed 881.472 691.160 362.380 1.275 0.2030
langSE:directionreversed -1285.287 920.306 402.617 -1.397 0.1633
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.860
langSE -0.393 0.012
dirctnrvrsd -0.295 -0.001 0.442
lngSE:drctn 0.217 0.007 -0.580 -0.752
#ggcoef(area_lm)medians <- iqr %>%
group_by(name,lang,direction) %>%
summarise(xIQR = mean(xIQR,na.rm=TRUE),
yIQR = mean(yIQR,na.rm=TRUE),
xmed = mean(xmed,na.rm=TRUE),
ymed = mean(ymed,na.rm=TRUE)) %>%
group_by(lang,direction) %>%
summarise(xIQR = mean(xIQR,na.rm=TRUE),
yIQR = mean(yIQR,na.rm=TRUE),
x = mean(xmed,na.rm=TRUE),
y = mean(ymed,na.rm=TRUE)) %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
ggplot(medians, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")First let’s prep the data.
multiples <- xydata %>%
filter(!is.na(x)) %>%
filter(!is.na(y)) %>%
group_by(name, age, lang, story, direction, trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE),
area = xIQR*yIQR,
x_90 = quantile(x, .95, na.rm=TRUE) - quantile(x, .05, na.rm=TRUE),
y_90 = quantile(y, .95, na.rm=TRUE) - quantile(y, .05, na.rm=TRUE),
area_90 = (x_90) * (y_90),
x_mean = mean(x, na.rm = TRUE),
y_mean = mean(y, na.rm = TRUE),
x_sd = sd(x, na.rm = TRUE),
y_sd = sd(y, na.rm = TRUE),
x_1sd = (x_mean+x_sd) - (x_mean-x_sd),
y_1sd = (y_mean+y_sd) - (y_mean-y_sd),
area_1sd = x_1sd * y_1sd,
x_2sd = (x_mean+(x_sd*2)) - (x_mean-(x_sd*2)),
y_2sd = (y_mean+(y_sd*2)) - (y_mean-(y_sd*2)),
area_2sd = x_2sd * y_2sd) %>%
group_by(name, lang, direction) %>%
summarise_if(is.double, funs(mean), na.rm = T) %>%
group_by(lang, direction) %>%
summarise_if(is.double, funs(mean), na.rm = T)
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) Let’s see.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, xmed, ymed, xIQR, yIQR) %>%
group_by(lang, direction) %>%
summarise(xmin = xmed-(xIQR/2),
xmax = xmed+(xIQR/2),
ymin = -1*(ymed-(yIQR/2)),
ymax = -1*(ymed+(yIQR/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")So I calculated the average median across, and the middle 90% of the data.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, xmed, ymed, x_90, y_90) %>%
group_by(lang, direction) %>%
summarise(xmin = xmed-(x_90/2),
xmax = xmed+(x_90/2),
ymin = -1*(ymed-(y_90/2)),
ymax = -1*(ymed+(y_90/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")So this is using the mean of the means, plus or minus one SD. This is equivalent to middle 68%.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, x_mean, y_mean, x_1sd, y_1sd) %>%
group_by(lang, direction) %>%
summarise(xmin = x_mean-(x_1sd/2),
xmax = x_mean+(x_1sd/2),
ymin = -1*(y_mean-(y_1sd/2)),
ymax = -1*(y_mean+(y_1sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")And this is using the mean of the means, plus or minus two SD. This is equivalent to middle 96%.
curr_data <- multiples %>%
ungroup() %>%
select(lang, direction, x_mean, y_mean, x_2sd, y_2sd) %>%
group_by(lang, direction) %>%
summarise(xmin = x_mean-(x_2sd/2),
xmax = x_mean+(x_2sd/2),
ymin = -1*(y_mean-(y_2sd/2)),
ymax = -1*(y_mean+(y_2sd/2)))
ggplot(curr_data, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")